package com.jielin.handling.util.lucene;


import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;

/**
 * @Description
 * @Author WR
 * @Date 2020-10-14 13:24
 * @Version 1.0
 */
public class ExtractorPDF {

    public static String getText(String file) {
        String s = "";
        String pdffile = file;
        PDDocument pdfdoc = null;
        try {
            pdfdoc = PDDocument.load(new File(pdffile));
//            PDFTextStripper stripper = new PDFTextStripper("GBK");
            PDFTextStripper stripper = new PDFTextStripper();
            s = stripper.getText(pdfdoc);

        } catch (IOException e) {
// TODO Auto-generated catch block
            e.printStackTrace();
        } finally {
            try {
                if (pdfdoc != null) {
                    pdfdoc.close();
                }
            } catch (IOException e) {
// TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        return s;
    }

    public static void toTextFile(String doc, String filename) throws Exception {
        String pdffile = doc;
        PDDocument pdfdoc = null;
        try {
            pdfdoc = PDDocument.load(new File(pdffile));

//            PDFTextStripper stripper = new PDFTextStripper("GBK");
            PDFTextStripper stripper = new PDFTextStripper();
            PrintWriter pw = new PrintWriter(new FileWriter(filename));
            stripper.writeText(pdfdoc, pw);

        } catch (IOException e) {
// TODO Auto-generated catch block
            e.printStackTrace();
        } finally {
            try {
                if (pdfdoc != null) {
                    pdfdoc.close();
                }
            } catch (IOException e) {
// TODO Auto-generated catch block
                e.printStackTrace();
            }
        }

    }

    /**
     * @param args
     */
    public static void main(String[] args) {
// TODO Auto-generated method stub
        try {
            String sc = getText("D:\\data\\recordpdfs\\0的笔录2019_7_5_1323398167.pdf");
            System.out.print(sc);
            toTextFile("D:\\data\\recordpdfs\\0的笔录2019_7_5_1323398167.pdf", "D:\\data\\recordpdfs\\0的笔录2019_7_5_1323398167.txt");
        } catch (Exception e) {
// TODO Auto-generated catch block
            e.printStackTrace();
        }

    }
}
